library(data.table)
library(tidyr)
library(dplyr)
library(purrr)
library(lubridate)
CLIMATE_ZONE <- 'w'
DEM_COLS <- c('prem_id','PREMI_cec_climate_zone_cd', 'PREMI_acct_id','sp_id','weather_station')
OUT_DIR_TEMP <- 'E:/CEA_DATA/interval_data/electric/shifted_tou/'
OUT_NAME_14 <- paste0('2014_tou_', CLIMATE_ZONE, '.txt')
OUT_NAME_15 <- paste0('2015_tou_', CLIMATE_ZONE, '.txt')
OUT_NAME_14_DAILY <- paste0('2014_daily_', CLIMATE_ZONE, '.txt')
OUT_NAME_15_DAILY <- paste0('2015_daily_', CLIMATE_ZONE, '.txt')
# Get the weatherstation data
dem_5rows <- read.table('E:/CEA_DATA/demographics_data/cea_demographics.txt'
, na.strings = '?'
, header = TRUE
, nrows = 5
, stringsAsFactors = TRUE)
dem_5rows[,c('prem_id', 'sa_id', 'sp_id','HOME_hm_htg_typ_cd','pty_id','PREMI_acct_id')] <- sapply(dem_5rows[,c('prem_id', 'sa_id', 'sp_id','HOME_hm_htg_typ_cd','pty_id','PREMI_acct_id')], as.character)
dem_5rows[,c('FUEL_coal_heat_pct', 'FUEL_solar_heat_pct','FUEL_kerosene_oil_heat_pct')] <- sapply(dem_5rows[,c('FUEL_coal_heat_pct', 'FUEL_solar_heat_pct','FUEL_kerosene_oil_heat_pct')], as.numeric)
dem_5rows[,c('HOME_hm_ac_typ_cd')] <- sapply(dem_5rows[,'HOME_hm_ac_typ_cd'], as.factor)
dem_classes <- sapply(dem_5rows, class)
DT_Dem <- fread('E:/CEA_DATA/demographics_data/cea_demographics.txt',
header = 'auto',
sep='auto',
na.strings = '?',
stringsAsFactors = FALSE,
colClasses = dem_classes,
showProgress = FALSE )
sample_sp_id <- list.files('E:/CEA_DATA/interval_data/electric/hourly_by_sp_id/interval_14_15/')
sample_sp_id <- tibble::as_tibble(sample_sp_id) %>%
mutate(sp_id = gsub(pattern = '.csv',replacement = '',value)) %>%
select(file = value, sp_id)
DT_Dem <- as.data.frame(DT_Dem)
# merge with demographic for weather station information
sample_lookup <- inner_join(sample_sp_id
, DT_Dem %>% select(prem_id,climate_zone_cd = PREMI_cec_climate_zone_cd, acct_id = PREMI_acct_id, sp_id, weather_station)
, by = 'sp_id') %>%
select(climate_zone_cd, file, sp_id, prem_id, acct_id, weather_station) %>%
group_by(climate_zone_cd) %>%
nest(.key = sample_100) %>%
mutate(sample_10 = map(sample_100, sample_frac, .10, replace = TRUE),
sample_20 = map(sample_100, sample_frac, .20, replace = TRUE),
sample_30 = map(sample_100, sample_frac, .30, replace = TRUE),
sample_40 = map(sample_100, sample_frac, .40, replace = TRUE),
sample_50 = map(sample_100, sample_frac, .50, replace = TRUE),
sample_65 = map(sample_100, sample_frac, .65, replace = TRUE),
sample_75 = map(sample_100, sample_frac, .75, replace = TRUE),
sample_85 = map(sample_100, sample_frac, .85, replace = TRUE)) %>%
select(climate_zone_cd, sample_10:sample_85, sample_100)
TOU_CURRENT <- list(
winter_partial_weekday = 17:19
, winter_off_weekday = c(0:16, 20:23)
, winter_off_weekend = 0:23
, summer_peak_weekday = 13:18
, summer_partial_weekday = c(10:12, 19:20)
, summer_off_weekday = c(0:9, 21:23)
, summer_partial_weekend = 17:19
, summer_off_weekend = c(0:16, 20:23)
)
gc()
sample_pop <- sample_lookup %>% filter(climate_zone_cd == 'W') %>%
mutate(file = sample_10 %>% map('file')) %>%
select(file) %>%
unnest() %>%
flatten_chr()
IN_NAME <- 'E:/CEA_DATA/interval_data/electric/hourly_by_sp_id/interval_14_15/'
sample_pop <- paste0(IN_NAME,sample_pop)
d <- map(.x = sample_pop, .f = fread, header = TRUE, sep = 'auto'
, check.names = TRUE
, stringsAsFactors = TRUE
, integer64 = 'character'
, verbose = FALSE) %>% rbindlist()
# Input: one file with usage
# Output: separate files with usage for each year
START_TIME <- proc.time()
# OUT_DIR_TEMP <- paste0(OUT_LOC, subdir, '/')
d[, `:=`(season = factor(ifelse(as.numeric(match(month, month.abb)) %in% c(4:10),'summer', 'winter'))
, year = year(date)
, holiday = FALSE)]
DT_Dem <- as.data.table(DT_Dem)
setkey(d, sp_id)
setkey(DT_Dem, sp_id)
d <- merge(d, DT_Dem[,.SD, .SDcols = DEM_COLS])
setnames(d, old = c('PREMI_cec_climate_zone_cd', 'PREMI_acct_id')
, new = c('climate_zone_cd','acct_id'))
# Change to factor for space
d[, `:=`(sp_id = factor(sp_id)
, prem_id = factor(prem_id)
, acct_id = factor(acct_id)
, weather_station = factor(weather_station)
, climate_zone_cd = factor(climate_zone_cd)
, customer = paste0(prem_id,'_',acct_id))]
# Get the time of use data
train <- subset(d, year == 2014)
test <- subset(d, year == 2015)
# Get the time of use data
train[,day_2014:=lubridate::wday(date)] # differentiate days
train[,day_2015:= lubridate::wday(lubridate::ymd(date) + lubridate::years(1))] # differentiate days
train[,date_2015:= lubridate::ymd(date) + lubridate::years(1)] # differentiate days
train[,c(paste0('day_name_',2014:2015)):= lapply(.SD, weekdays)
, .SDcols = c('day_2014','day_2015')]
test[,day_2015:=lubridate::wday(date)]
train[,c('weekend_2014','weekend_2015'):=lapply(.SD, function(x) x %in% c(1, 7))
, .SDcols = c('day_2014','day_2015')]
test[,c('weekend_2015'):=lapply(.SD, function(x) x %in% c(1, 7)), .SDcols = c('day_2015')]
# train[date == '2014-08-03',]
# test[date == '2015-08-03',]
#
train[, period_2014 := get_period(TOU_CURRENT, season, weekend_2014, hour, holiday)]
train[, period_2015 := get_period(TOU_CURRENT, season, weekend_2015, hour, holiday)]
test[, period_2015 := get_period(TOU_CURRENT, season, weekend_2015, hour, holiday)]
# Convert to time of use data
train_tou <- train[, .(usage = sum(usage, na.rm = TRUE), .N)
, by = .(weather_station, sp_id, prem_id, acct_id, climate_zone_cd, season, period_2015, year, date) ]
test_tou <- test[, .(usage = sum(usage, na.rm = TRUE), .N)
, by = .(weather_station, sp_id, prem_id, acct_id, climate_zone_cd, season, period_2015, year, date) ]
write.table(train_tou, paste0(OUT_DIR_TEMP, OUT_NAME_14), row.names = FALSE, quote = FALSE, sep = '\t')
write.table(test_tou, paste0(OUT_DIR_TEMP, OUT_NAME_15), row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(train, paste0(OUT_DIR_TEMP, OUT_NAME_14_DAILY), row.names = FALSE, quote = FALSE, sep = '\t')
# write.table(test, paste0(OUT_DIR_TEMP, OUT_NAME_15_DAILY), row.names = FALSE, quote = FALSE, sep = '\t')
#
print(paste0(climate_zone, ' is complete (', uniqueN(d, by = c('acct_id', 'prem_id')), ' customers)'))
print(paste0(climate_zone, ' is complete (', uniqueN(d, by = c('sp_id')), ' sp_ids)'))
print(proc.time() - START_TIME)
read_aggregate('t', 'sample1')
read_aggregate('w', 'sample1')
read_aggregate('x', 'sample1')
read_aggregate('t', 'sample2')
read_aggregate('w', 'sample2')
read_aggregate('x', 'sample2')
read_aggregate('t', 'sample3', 3100, TOU_ALT)
read_aggregate('w', 'sample3', 3100, TOU_ALT)
read_aggregate('x', 'sample3', 3100, TOU_ALT)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.